; Calculating Bspline/Bezier/Catmull Rom/Hermite curves procedures
; implementation in Fasm by macgub
; web    : http://macgub.co.pl
call_l_pipe:
                ; calculate long pipe
                mov      esi,[long_pipe_vert_ptr]
                mov      ecx,[long_pipe_segs_c]
                cmp      [lpcurve_tp],0   ; bspline or bezier long pipe ?
                je       @f
                lea      ecx,[ecx*3]
             @@:
                ; calc long pipe
                imul     ecx,[long_pipe_one_seg_c]
                mov      eax,9      ; rotary count per one turn
                movzx    edx,[tolerancy_flag]
                imul     edx,6
                cvtsi2ss xmm0,edx   ; radius according approximation tolerancy flag
                mov      ebx,[triangles_ptr]
                mov      edi,[points_r_ptr]
                ; in:  esi  - ptr to curve vertices - middle points of pipe
                ;      ecx  - curve vertices count
                ;      eax  - rotary steps count as integer
                ;      xmm0 - radius, lowest dword float
                ;      ebx  - ptr to triangles list
                ;      edi  - ptr to pipe vertices
                ; out: ecx  - triangles count
                ;      ebx  - vertices count
                call     do_long_pipe
                mov      [triangles_count_var],ecx
                mov      [points_count_var],ebx
                mov      eax,'firs'
                call     calc_combo     ; no mem work  !!
ret
;============================================================
init_def_derv:
; Attempt for making deformation derives
; (using splines) - to easier navigate
; deformation vertices process..
; I tried at first:
;  - bend
;
    ;  ecx = vertices count
    ;  esi = vertices begin ptr
    ;  edi = ptr to result derives
    ;  eax = derives count
    push      ebp
    mov       ebp,esp
    sub       esp,63
    and       ebp,-16

    .der_cnt  equ [ebp-4]
    .v_cnt    equ [ebp-8]
    .v_ptr    equ [ebp-12]
    .der_ptr  equ [ebp-16]
    .list     equ [ebp-20]
    .list2    equ [ebp-24]
    .list_en  equ [ebp-36]
    .fact     equ [ebp-40]
    .dl       equ [ebp-32]  ; delta
    .dl2      equ [ebp-48]  ; delta2

    mov       .der_cnt,eax
    mov       .v_cnt,ecx
    mov       .v_ptr,esi
    mov       .der_ptr,edi
    mov       .fact ,dword 10000.0
    mov       edi,esi    ;.vert_offs
    mov       eax,'max'  ;only center translate - find max min also !!
    call      normalize_object
    ; xm1 = max x y z - if 'max' param
    ; xm2 = min x y z
    subps     xmm1,xmm2
    mulps     xmm1,xmm1
    sqrtps    xmm1,xmm1  ;abs value
    xorps     xmm7,xmm7
    movlhps   xmm1,xmm7
    shufps    xmm1,xmm1,11111100b

    mov       ecx,.der_cnt
    shl       ecx,2
    push      ecx
    cvtsi2ss  xmm2,ecx
    shufps    xmm2,xmm2,0
    rcpps     xmm2,xmm2
    mulps     xmm2,xmm1
    movlps    .dl,xmm2   ;.dl = lenght / cnt
    mov       ecx,0.1
    movd      xmm1,ecx
    shufps    xmm1,xmm1,0
    mulps     xmm1,xmm2
    movlps    .dl2,xmm2  ;.dl2 = (lenght / cnt) * 0.1
    ; delta according to "x" position
    mov       ebx,.v_cnt
    add       ebx,40
    shl       ebx,3
    malloc    ebx
    mov       .list,eax
    add       eax,ebx
    sub       eax,40
    mov       .list_en,eax
    malloc     ebx
    mov       .list2,eax

    movlps    xmm1,.fact
    shufps    xmm1,xmm1,0
    mov       eax,.list
    mov       ecx,.v_cnt
    mov       esi,.v_ptr
  @@:
    movlps    xmm0,[esi]
    addps     xmm0,xmm1
    cvtps2dq  xmm0,xmm0
    movlps    [eax],xmm0
    add       eax,8
    add       esi,12
    loop      @b
    mov       esi,.list
    mov       edi,.list2
    mov       ecx,.v_cnt
    call      sort_hybrid

    mov       esi,.list
    mov       edi,.der_ptr
    pop       ecx

    movlps    xmm1,.fact ;.fact = 10000.0
    shufps    xmm1,xmm1,0
    cvtpi2ps  xmm3,[esi]

  .bb1:
    movaps    xmm7,xmm3
    movaps    xmm6,xmm3
    movaps    xmm4,xmm1
    addps     xmm4,xmm1
    xorps     xmm5,xmm5
    subps     xmm5,xmm1  ;xm5 = - .fact
    subps     xmm6,.dl2
    addps     xmm7,.dl
  .bb2:
    cvtpi2ps  xmm2,[esi]
    movaps    xmm0,xmm2
    maxps     xmm5,xmm2
    minps     xmm4,xmm2
    cmpleps   xmm2,xmm7
    cmpleps   xmm0,xmm6
    xorps     xmm2,xmm0
    add       esi,8
    cmp       esi,.list_en
    ja        .en1
    movmskps  eax,xmm2
 ;  and       eax,11b
    bt        eax,0
    jc        .bb2
 ;  or        eax,eax
 ;  jnz       .bb2
    ; der ptr =  coords as floats x, y, z 1000 factored
    subps     xmm5,xmm1
    subps     xmm4,xmm1
    xorps     xmm6,xmm6
    addps     xmm4,xmm5
    mulps     xmm4,[f05x3]
    movlhps   xmm4,xmm6
    movlps    [edi],xmm4
    addps     xmm3,.dl
    add       edi,12
    loop      .bb1
  .en1:
    mfree     .list
    mfree     .list2
    add       esp,63
    pop       ebp

ret

;=========================================================================
init_long_pipe_derives:
; procedure make circle using all derives vertices
; in: esi = first derive vertex
;     edi = ptr to destination derives
;     ecx = derives vertices count
;     edx != 0 - add noise
;     out:
;     edi = end

    or       edx,edx
    jz       .f
    pushad   ;eax ebx ecx edx esi edi
    call     create_noise  ; special for hrt object
    popad    ;edi esi edx ecx ebx eax
  .f:
    push     ebp
    mov      ebp,esp
    sub      esp,104
    .cnt     equ [ebp-4]
    .one_d   equ [ebp-8]
    .sinx    equ dword[ebp-12]
    .cosx    equ [ebp-16]
    .sinylp  equ dword[ebp-20]
    .cosylp  equ [ebp-24]
    .sinzlp  equ dword[ebp-28]
    .coszlp  equ [ebp-32]
    .mx      equ [ebp-68]
    .cnt2    equ [ebp-72]
    .eesi    equ dword[ebp-76]
    .eedi    equ dword[ebp-80]
    .factLP  equ [ebp-96]
    .noise   equ dword[ebp-100]

    mov     .noise,edx
    mov     .eesi,esi
    mov     .eedi,edi
  @@:
    mov     .cnt,ecx
    or       edx,edx
    jz       @f
    shr      dword .cnt,3
  @@:
    shr      dword .cnt,1
    push     dword 3.141592653
    movlps   xmm0,[esp]
    add      esp,4
    cvtpi2ps xmm1,.cnt
    divps    xmm0,xmm1
    movss    .one_d,xmm0
    call     sin_cos
    movlps   .cosylp,xmm0
    xorps    xmm0,xmm0
    call     sin_cos
    movlps   .coszlp,xmm0
    xor      eax,eax
    mov      dword .cnt2,eax
    mov      ebx,noise_buff
    mov      eax,1000
    cvtsi2ss xmm7,eax
    rcpps    xmm7,xmm7
    shufps   xmm7,xmm7,11000000b
    movups   .factLP,xmm7    ; factor1

 .bbb2:
    push     ecx
    push     ebx
    fninit
    fld      dword .one_d
    fimul    dword .cnt2
    fsincos
    fstp     .sinx
    fstp     dword .cosx

;   pushad
;   movss     xmm0,.one_d   ;   Whats a pity with this
;   cvtsi2ss  xmm1,.cnt2    ;   hrt object generating proc
;   mulps     xmm0,xmm1     ;   wont work properly....
;   call      sin_cos       ;   I use coprocessor instead..
;   movlps    .cosx,xmm0    ; <-------
;   popad


   movlps    xmm1,.cosx
   movhps    xmm0,.coszlp
   movlps    xmm0,.cosylp
   lea       edi,.mx
   call      make_rotation_matrix

   mov       ecx,1
   mov       esi,.eesi
   mov       edi,.eedi
   lea       ebx,.mx
   call      rotary
   xor       eax,eax
   pop       ebx
   cmp       .noise,eax
   je        @f
   sub       edi,12
   movlps    xmm0,[ebx]
   xorps     xmm1,xmm1
   pcmpgtw   xmm1,xmm0
   punpcklwd xmm0,xmm1
   cvtdq2ps  xmm0,xmm0
   movups    xmm7,.factLP
   mulps     xmm0,xmm7
   movups    xmm1,[edi]
   addps     xmm1,xmm0
   movups    [edi],xmm1
;  add       ebx,30 * 6
   add       ebx,6
 @@:
   add       .eedi,12
   pop       ecx
   inc       dword .cnt2
   cmp       ecx,.cnt2
   jnz       .bbb2

   mov       esp,ebp
   pop       ebp
ret
;============================================================================
calc_long_curve:
; in:
;  eax - curve one segment steps count, must be divisable by 4
;  ebx - curve segments count
;  ecx - curve derives ptr, Bspline - must end redundand
;        first two derives vertices (closed curve)
;  edx - curve_vertices ptr, must be allocated enough memory
;  esi - curve type, 0 = Bezier, 1 = bspline, 2 = Cat-Rom, 3 = Hermite

; out:
;  ecx - curve lenght   ??
 
    push    ebp
    mov     ebp,esp
    sub     esp,98
    and     ebp,-16

    .step4                 equ [ebp-16]
    .t1                    equ [ebp-32]   ; t1, t2, t3, t4
    .Bspline_one_seg_count equ dword[ebp-36]
    .pr_curve              equ dword[ebp-40]
    .addd                  equ dword[ebp-44]
    .Bspline_derives_ptr   equ dword[ebp-48]
    .init_t_value          equ [ebp-64]
    .curv_type             equ word[ebp-68]
    .cnt                   equ dword[ebp-72]
    .Bspline_segs_count    equ [ebp-76]

    mov      .Bspline_one_seg_count,eax
    mov      .Bspline_derives_ptr,ecx
    mov      .Bspline_segs_count,ebx
    mov      .curv_type,si
    ; curve -  Bez or Bspline one segement steps count,
    ;     must be divisable by 4  !!
    cvtsi2ss xmm0,eax
    rcpss    xmm0,xmm0
    shufps   xmm0,xmm0,0
    movaps   xmm1,xmm0
    mov      eax,4
    mov      ecx,12
    cvtsi2ss xmm4,eax
    mov      edi,36
    or       esi,esi
    shufps   xmm4,xmm4,0
    cmovnz   edi,ecx
    mov      .addd,edi
;   cmp     si,3
;   cmovne  edi,ecx
    push     ebx
;   mov     .addd,edi
    mov      edi,calc_Bspline
    mov      ecx,calc_Bezier_curve
    mov      ebx,calc_Rom_Catm_spline
    mov      eax,calc_Hermite
    or       esi,esi
    cmovnz   ecx,edi
    cmp      si,2
    cmove    ecx,ebx
    cmp      si,3
    cmove    ecx,eax
    mov      .pr_curve,ecx
    pop      ebx
    mulps    xmm0,xmm4  ;[const4]
    movaps   .step4,xmm0
    mulps    xmm1,[const1234]
    movaps   .init_t_value,xmm1
    xor       edi,edi
    mov      .cnt,edi
    movaps   .t1,xmm1
    mov      ecx,.Bspline_segs_count
    inc      ecx
    ; Bspline segments count
    mov      ebx,.Bspline_derives_ptr  ; must end redundand
                                       ; first two derives vertices
    lea      esi,.t1
    mov      edi,edx  ;.Bspline_vertices_ptr    ; must be alloc enough mem
 .next_seg:
    movaps   xmm0,.init_t_value
    movaps   .t1,xmm0
    mov      eax,.Bspline_one_seg_count
    shr      eax,2
 .nexxt:
    pushad
    call     .pr_curve
    popad
    add      .cnt,4
    add      edi,48
    movaps   xmm0,.t1
    addps    xmm0,.step4
    movaps   .t1,xmm0
    dec      eax
    jnz      .nexxt
    add      ebx,.addd    ; offset in Bspline/Bezier curv derives list
    loop     .next_seg
    mov      ecx,.cnt
    add      esp,98
    pop      ebp
ret
;=========================================================================
calc_Hermite:
; in:
;      ebx - derives: dword float x1,y1,z1,x2,y2,z2
;                                 x3,y3,z3,x4,y4,z4
;      esi - aligned t1,t2,t3,t4
; out:
;      edi - points (vertices) as dword float
;           x1,y1,z1 x 16  


;   y = (a0*y1+a1*m0+a2*m1+a3*y2)

  push      ebp
  mov       ebp,esp
  sub       esp,144
  and       ebp,-16

  .m0        equ    [ebp-16]
  .m1        equ    [ebp-32]
  .tensionm1 equ    [ebp-128]
  .biasp1    equ    [ebp-124]
  .biasm1    equ    [ebp-120]

   mov       eax,40
   cvtsi2ss  xmm6,eax
   shufps    xmm6,xmm6,0
   rcpps     xmm6,xmm6
   push      ebx
   mov       ebx,BXRES/2
   movaps    xmm4,[the_one]
                              ; xm6 - unit
   mov       edx,[NextScaleXY]
   movzx     eax,dx
   sub       eax,ebx
   cwde
   push      eax
   mov       eax,edx
   shr       eax,16
   sub       eax,ebx  ;BXRES/2
   cwde
   push      eax
   cvtpi2ps  xmm5,[esp]
   shufps    xmm5,xmm5,01010100b
   mulps     xmm6,xmm5
   add       esp,8
   pop       ebx
   addsubps  xmm4,xmm6   ; 1 st,3 rd -> sub, 2cond  -> add
   movups    .tensionm1,xmm4

;   tension :
;   bias    : tilt of curve
;   t^2 = t*t
;   t^3 = t*t*t
;
;   m0 = (y1-y0)*(1+bias)*(1-tension)/2 + (y2-y1)*(1-bias)*(1-tension)/2
;   m1 = (y2-y1)*(1+bias)*(1-tension)/2 + (y3-y2)*(1-bias)*(1-tension)/2
;   a0 =  2*t^3 - 3*t^2 + 1;
;   a1 =    t^3 - 2*t^2 + t;
;   a2 =    t^3 -   t^2;
;   a3 = -2*t^3 + 3*t^2;

   movups   xmm0,[esi]
   movaps   xmm1,xmm0
   movaps   xmm2,xmm0      ; xm0 = t
   mulps    xmm2,xmm2      ; xm2 = t^2
   mulps    xmm1,xmm2      ; xm1 = t^3
   movups   xmm3,[ebx]     ; 0
   movups   xmm7,[ebx+12]  ; 1
   movups   xmm5,[ebx+24]  ; 2
   movups   xmm6,[ebx+36]  ; 3

   subps    xmm6,xmm5      ; xm6 = xyz3 - xyz2
   subps    xmm5,xmm7      ; xm5 = xyz2 - xyz1
   subps    xmm7,xmm3      ; xm7 = xyz1 - xyz0
   movaps   xmm4,xmm5      ; xm2 =
   movlps   xmm0,.biasm1
   shufps   xmm0,xmm0,0
   mulps    xmm4,xmm0      ; xm4 =
   mulps    xmm6,xmm0      ;
   movlps   xmm0,.biasp1
   shufps   xmm0,xmm0,0
   mulps    xmm5,xmm0
   mulps    xmm7,xmm0

   addps    xmm7,xmm5      ; xm7 = m0 xyz
   addps    xmm6,xmm4      ; xm6 = m1 xyz
   movaps   xmm4,[the_one]
   movaps   xmm5,xmm4
   movaps   xmm3,xmm4
   addps    xmm5,xmm4      ; xm5 = 2.0
   addps    xmm3,xmm5      ; xm3 = 3.0
   divps    xmm7,xmm5
   divps    xmm6,xmm5

   movlps   xmm0,.tensionm1
   shufps   xmm0,xmm0,0
   mulps    xmm7,xmm0
   mulps    xmm6,xmm0
   movaps   .m0,xmm7
   movaps   .m1,xmm6
   movaps   xmm7,xmm1      ; t^3
   movaps   xmm6,xmm2      ; t^2
   mulps    xmm7,xmm5      ; xm7 = 2 * t^3
   mulps    xmm6,xmm3      ; xm6 = 3 * t^2
;   a0 =  2*t^3 - 3*t^2 + 1
;   a1 =    t^3 - 2*t^2 + t
;   a2 =    t^3 -   t^2
;   a3 = -2*t^3 + 3*t^2
   movups   xmm0,[esi]
   addps    xmm4,xmm7
   subps    xmm4,xmm6      ; xm4 = a0          ; t1234
   subps    xmm6,xmm7      ; xm6 = a3          ; t1234
   mulps    xmm5,xmm2      ; xm5 = 2 * t^2
   subps    xmm0,xmm5      ; xm0 = t - 2 * t^2
   addps    xmm0,xmm1      ; xm0 = a1
   subps    xmm1,xmm2      ; xm1 = a2          ; t1234

   .a0      equ[ebp-48]
   .a1      equ[ebp-64]
   .a2      equ[ebp-80]
   .a3      equ[ebp-96]

   movaps   .a0,xmm4
   movaps   .a1,xmm0
   movaps   .a2,xmm1
   movaps   .a3,xmm6
;  x y z = (a0*xyz1+a1*m0+a2*m1+a3*xyz2)
;  a0123    - depending on 't'
;  m0, m1   - depending on derives, bias, tension

   lea      eax,.a0
   push     ecx
   mov      ecx,4
 .ll_her:
   movups   xmm0,[ebx+12]  ; xyz1
   movups   xmm4,[eax]
   movups   xmm5,[eax-16]
   movups   xmm2,[eax-32]
   movups   xmm6,[eax-48]

   shufps   xmm4,xmm4,0    ; t1/2/3/4 dep brdc
   shufps   xmm5,xmm5,0    ;    ''
   shufps   xmm2,xmm2,0    ; t1/2/3/4 dep brdc
   shufps   xmm6,xmm6,0    ;    ''
   mulps    xmm0,xmm4      ; xyz1*a0
   mulps    xmm5,.m0       ; m0*a1
   mulps    xmm2,.m1       ; m1*a2
   movups   xmm1,[ebx+24]
   mulps    xmm1,xmm6      ; a3*xyz2
   addps    xmm0,xmm5
   addps    xmm2,xmm1
   addps    xmm0,xmm2      ; xm0 =  first desired xyz
   movlps   [edi],xmm0
   movhlps  xmm0,xmm0
   movss    [edi+8],xmm0
   add      edi,12
   add      eax,4
   loop     .ll_her
   pop      ecx

   add      esp,144
   pop      ebp
ret
;===========================================================================
calc_Bspline:

; in  ebx - derives: dword float x1,y1,z1,x2,y2,z2
;                                x3,y3,z3,x4,y4,z4
;     esi - aligned t1,t2,t3,t4
; out edi - points (vertices) as dword float
;           x1,y1,z1 x 16
;
  push  ebp
  mov   ebp,esp
  sub   esp,80
  and   ebp,-16

  .a1    equ [ebp-16]
  .b1    equ [ebp-32]
  .c1    equ [ebp-48]
  .d1    equ [ebp-64]

     ; a = 1/6*(1-3*t+3*t^2-t^3)
     ; b = 1/6*(4-6*t^2+3*t^3)
     ; c = 1/6*(1+3*t+3t^2-3t^3)
     ; d = 1/6*(t^3)


  movaps  xmm7,[const1]
  movaps  xmm6,xmm7         ; (new)  6 * 3 + 7  + (old) 8 * 3
  addps   xmm6,xmm7         ; 49 bytes
  addps   xmm6,xmm7
  movaps  xmm5,xmm6
  addps   xmm5,xmm5
  rcpps   xmm5,xmm5
  movaps  xmm0,xmm7      ; 8 * 7 = 56 bytes
  movaps  xmm1,[esi]
  movaps  xmm2,xmm1
  mulps   xmm2,xmm2
  movaps  xmm3,xmm2
  mulps   xmm3,xmm1
  mulps   xmm2,xmm6
  mulps   xmm1,xmm6
  subps   xmm0,xmm1
  addps   xmm0,xmm2
  subps   xmm0,xmm3
  mulps   xmm0,xmm5
  movaps  .a1,xmm0
  movaps  xmm0,[const4]  ; unchanged
  subps   xmm0,xmm2
  subps   xmm0,xmm2
  movaps  xmm4,xmm3
  mulps   xmm4,xmm6
  addps   xmm0,xmm4
  mulps   xmm0,xmm5
  movaps  .b1,xmm0
  movaps  xmm0,xmm7
  addps   xmm0,xmm1
  addps   xmm0,xmm2
  subps   xmm0,xmm4
  mulps   xmm0,xmm5
  movaps  .c1,xmm0
  mulps   xmm3,xmm5
  movaps  .d1,xmm3
  call    crv

  add       esp,80
  pop       ebp
ret
;===========================================================================
calc_Rom_Catm_spline:
; in  ebx - derives: dword float x1,y1,z1,x2,y2,z2
;                                x3,y3,z3,x4,y4,z4
;     esi - aligned t1,t2,t3,t4
; out edi - points (vertices) as dword float
;           x1,y1,z1 x 16
;  in sec cause:
;     if eax  = 'of' calc only factors  => its not used anywhere
;  out sec cause:
;        xmm0 = a1 b1 c1 d1 as 4 floats
  push    ebp
  mov     ebp,esp
  sub     esp,84
  and     ebp,-16
  .a1     equ [ebp-16]
  .b1     equ [ebp-32]
  .c1     equ [ebp-48]
  .d1     equ [ebp-64]
  .mark   equ [ebp-68]
     ; a = -t^3 + 2t^2 - t
     ; b = 3t^3 - 5t^2 + 2t
     ; c = -3t^3 + 4t^2 + t
     ; d = t^3
;  mov      .mark,eax
  movaps   xmm0,[esi]
  movaps   xmm1,xmm0
  movaps   xmm2,xmm0     ; xm2 = t
  mulps    xmm0,xmm0     ; xm0 = t^2
  movaps   xmm5,[const3] ; xm5 = 3
  movaps   xmm4,[f2x4]   ; xm4 = 2
  mulps    xmm1,xmm0     ; xm1 = t^3
  movaps   xmm3,xmm1     ; xm3 = t^3
  rcpps    xmm7,xmm4     ; xm7 = 0.5
  movaps   xmm6,xmm0     ; xm6 = t^2
  subps    xmm3,xmm0
  mulps    xmm3,xmm7
  movaps   .d1,xmm3
  mulps    xmm0,xmm4
  subps    xmm0,xmm1
  subps    xmm0,xmm2
  mulps    xmm0,xmm7
  movaps   .a1,xmm0
  movaps   xmm2,xmm4 ; xm2 = 2
  mulps    xmm1,xmm5 ; xm1 = t^3 * 3
  addps    xmm2,xmm1
  addps    xmm5,xmm4 ; xm5 = 5
  mulps    xmm5,xmm6 ; xm5 = 5 * t^2
  subps    xmm2,xmm5
  mulps    xmm2,xmm7
  movaps   .b1,xmm2
  addps    xmm4,xmm4 ; xm4 = 4
  mulps    xmm4,xmm6 ; xm4 = 4 * t^2
  subps    xmm4,xmm1
  addps    xmm4,[esi]
  mulps    xmm4,xmm7
  movaps   .c1,xmm4
;  cmp      dword .mark,'of'  ;    - only factors
;  je       .f2

  call     crv
;  jmp      .end
; .f2:

;  push     dword .d1 dword .c1 dword .b1 dword .a1
;  movups   xmm1,[esp]
;  add      esp,16
; ; movlps   xmm0,.a1
; ; movhps   xmm0,.b1
; ; movlps   xmm1,.c1
; ; movhps   xmm1,.d2
; ; shufps   xmm1,xmm0,10001000b
; ; movaps   xmm0,xmm1
 .end:
  add      esp,84
  pop      ebp
ret
;==================================================================
;==================================================================
calc_Bezier_curve:
; in  ebx - derives: dword float x1,y1,z1,x2,y2,z2
;                                x3,y3,z3,x4,y4,z4
;     esi - aligned t1,t2,t3,t4
; out edi - points (vertices) as dword float
;           x1,y1,z1 x 16
;
; if  eax = 'bend'  - only calc bend values:     \
;     ebx - derives, xmm0 - t value -             \ to do
;     out:                                        /
;        edi - only one point float x, y, z      /

      push  ebp
      mov   ebp,esp
      sub   esp,80
      and   ebp,-16

      .a1   equ [ebp-16]
      .b1   equ [ebp-32]
      .c1   equ [ebp-48]
      .d1   equ [ebp-64]

      movaps   xmm0,[const1]
      movaps   xmm1,[esi]
      subps    xmm0,xmm1                                 ;   a  = (1-t)^3
      movaps   xmm7,xmm0                                 ;   b  = 3*(1-t)^2*t
      movaps   xmm6,xmm0                                 ;   c  = 3*(1-t)*t^2
      mulps    xmm6,xmm6
      movaps   xmm5,[const3]                             ;   d  = t^3
      mulps    xmm7,xmm6
      movaps   .a1,xmm7

      mulps    xmm6,xmm5
      mulps    xmm6,[esi]
      movaps   .b1,xmm6
      mulps    xmm0,xmm5
      mulps    xmm1,xmm1
      mulps    xmm0,xmm1
      movaps   .c1,xmm0
      mulps    xmm1,[esi]
      movaps   .d1,xmm1
      call     crv
      add      esp,80
      pop      ebp
ret
;=========================================================
crv:
  .a1       equ [ebp-16]
  .b1       equ [ebp-32]
  .c1       equ [ebp-48]
  .d1       equ [ebp-64]

  movups    xmm0,[ebx]
  movups    xmm2,[ebx+24]
  movaps    xmm1,xmm0
  movaps    xmm3,xmm2
  shufps    xmm0,xmm0,0
  shufps    xmm1,xmm1,11111111b
  shufps    xmm2,xmm2,0
  shufps    xmm3,xmm3,11111111b
  mulps     xmm0,.a1       ;   px = a*x1 + b*x2 + c*x4 + d*x4
  mulps     xmm1,.b1       ;   py = a*y1 + b*y2 + c*y3 + d*y4
  mulps     xmm2,.c1
  mulps     xmm3,.d1
  addps     xmm0,xmm1
  addps     xmm2,xmm3
  addps     xmm0,xmm2
  movaps    xmm7,xmm0
  movhlps   xmm6,xmm0

  movups    xmm0,[ebx+4]
  movups    xmm2,[ebx+28]
  movaps    xmm1,xmm0
  movaps    xmm3,xmm2
  shufps    xmm0,xmm0,0
  shufps    xmm1,xmm1,11111111b
  shufps    xmm2,xmm2,0
  shufps    xmm3,xmm3,11111111b
  mulps     xmm0,.a1
  mulps     xmm1,.b1
  mulps     xmm2,.c1
  mulps     xmm3,.d1
  addps     xmm0,xmm1
  addps     xmm2,xmm3
  addps     xmm0,xmm2
  punpckldq xmm7,xmm0
  movhlps   xmm0,xmm0
  punpckldq xmm6,xmm0

  movups    xmm0,[ebx+8]
  movups    xmm2,[ebx+32]
  movaps    xmm1,xmm0
  movaps    xmm3,xmm2
  shufps    xmm0,xmm0,0
  shufps    xmm1,xmm1,11111111b
  shufps    xmm2,xmm2,0
  shufps    xmm3,xmm3,11111111b
  mulps     xmm0,.a1       ;   px = a*x1 + b*x2 + c*x4 + d*x4
  mulps     xmm1,.b1
  mulps     xmm2,.c1
  mulps     xmm3,.d1
                          ;   py = a*y1 + b*y2 + c*py3 + d*y4
  addps     xmm0,xmm1
  addps     xmm2,xmm3
  addps     xmm0,xmm2
  movhlps   xmm5,xmm7
  movhlps   xmm4,xmm6
  movlhps   xmm4,xmm6
  movlhps   xmm7,xmm0
  shufps    xmm5,xmm0,11010100b
  movups    [edi],xmm7
  movups    [edi+12],xmm5
  movaps    xmm6,xmm0
  movhlps   xmm6,xmm4
  movups    [edi+24],xmm6
  movlps    [edi+36],xmm4

  shufps    xmm0,xmm0,11111111b
  movss     [edi+44],xmm0


ret
;=================================================================

draw_long_pipe_derv_lines:
; in esi - derives vertices, each derive 4 vertices,
;          each derive vertex 3xdword float
;    ecx - vertices (!not derives!) count
;    edi - screen buffer ptr
;    eax - screen width
; derives list must be scaled and translated into screen coords
;    edx = marker if 'add' => add value in xmm1, to index in xmm3

     push      ebp
     mov       ebp,esp
     sub       esp,56

     .col       equ dword[ebp-4]
     .yres      equ dword[ebp-8]
     .xres      equ [ebp-12]
     .scr       equ [ebp-16]              ; .scr           \  dont change order
     .zbuff     equ [ebp-20]              ; .zbu            \
     .tex_ptr   equ [ebp-24]              ; .linetexptr     /
     .width     equ [ebp-28]              ; .width         /
     .mark      equ [ebp-32]
     .index     equ [ebp-36]
     .cnt       equ [ebp-40]
     .add_val   equ [ebp-56]

     mov       .mark,edx
     movss     .index,xmm3
     mov       .col,0x0000ff00
     mov       .width,eax
     mov       .scr,edi
     xor        eax,eax
     xorps      xmm6,xmm6
     movd       xmm7,[xres_vard]
     punpcklwd  xmm7,xmm6
     movlps     .xres,xmm7
     mov        .tex_ptr,dword 0x0000ff00
     mov        .cnt,eax
  .aagain:
     push       ecx
     push       esi
     movlps     xmm0,[esi]
     mov        eax,.index
     movhps     xmm0,[esi+12]

     cvtps2dq   xmm0,xmm0
     sub        esp,16
     movups     [esp],xmm0
     pop        eax ebx ecx edx
     xorps      xmm6,xmm6
     movups     xmm5,.width
     movlps     xmm7,.xres
     mov        edi,plain_horizontal
     call       line_grd_tex
     pop        esi
     pop        ecx
     add        esi,12
     loop       .aagain
     mov        esp,ebp
     pop        ebp


ret
;======================================================================
rotate_long_pipe_derv:
; rotate and translate Bspline/Bezier derives
; procedure uses some global variables

     push     ebp
     mov      ebp,esp

     .mxs     equ  [ebp-4]

     push     dword matrix_scaled
     mov      esi,main_rotary
     mov      eax,[esi+8]
     mov      ebx,[esi]
     mov      edx,[esi+4]
     mov      edi,.mxs
     call     make_matrixx
     mov      esi,.mxs    ;esi - pointer to 3x3 matrix
     mov      ebx,scale   ;ebx - ptr to scale
     call     add_scale_to_matrix
     mov      esi,long_pipe_derv ; bssurface_derives
     ; rotate bezier surf. nodes
     mov      edi,long_pipe_derv_rotated
     mov      ecx,[lpipe_derv_size]
  ;   push     ecx
     mov      ebx,.mxs
     call     rotary
     ; after rotary -> esi edi = src and dst ends
  ;   pop      ecx
  ;   mov      ebx,.mxs   ; perhaps spli deform call
  ;   call     rotary

     mov      ecx,[lpipe_derv_size]
     add      ecx,ecx
     movups   xmm3,[xxadd]
     cvtdq2ps xmm3,xmm3
     andps    xmm3,[zero_hgst]
     mov      ebx,long_pipe_derv_rotated
   @@:
     movups   xmm1,[ebx]
     addps    xmm1,xmm3
     movups   [ebx],xmm1
     add      ebx,12
     loop     @b
     mov      esp,ebp
     pop      ebp
ret

;======================================================================

do_long_pipe:
; in:  esi  - ptr to curve vertices - middle points of pipe
;      ecx  - curve vertices count
;      eax  - rotary steps count as integer
;      edx  = 'sold' - do object with wall based on rotated curve
;      xmm0 - radius, lowest dword float
;      ebx  - ptr to triangles list
;      edi  - ptr to pipe vertices
; out: ecx  - triangles count
;      ebx  - vertices count
; I am pretty sure that this pipe routines can be done
; more simply using cylinder equations. This can be
; shorter, faster and more nice. Feel free to make your
; own code instead mine - poor and humble.

  push ebp
  mov  ebp,esp
  sub  esp,199
  and  ebp,-16

  .normalize      equ dword[ebp-4]
  .curve_vert_c   equ      [ebp-8]
  .radius         equ      [ebp-16]
  .rot_steps_c    equ      [ebp-20]
  .one_deg        equ      [ebp-24]
  .vert1          equ      [ebp-40]
  .vert2          equ      [ebp-56]
  .cosinus        equ      [ebp-60]
  .sinus          equ      [ebp-64]
  .radius_vec     equ      [ebp-80]
  .counter_edi    equ      [ebp-84]
  .xedi           equ dword[ebp-88]
  .xesi           equ dword[ebp-92]
  .tri_list       equ dword[ebp-96]
  .point_to_rot   equ      [ebp-112]
  .vert_cnt       equ dword[ebp-116]
  .counter        equ dword[ebp-120]
  .axx            equ dword[ebp-124]
  .flag           equ byte [ebp-125]
  .mx             equ      [ebp-161]
  .vert3          equ      [ebp-177]

     mov       .normalize,normalize_vector
     movzx     edx,[lpipe_flag]
     mov       .flag,dl   ; object with wall based on rotated spline
 ;  .ff:
     mov       .curve_vert_c,ecx
     mov       .tri_list,ebx
     mov       .rot_steps_c,eax
     movss     .radius,xmm0
     mov       .xesi,esi
     mov       .xedi,edi
     movss     xmm0,[two_pi]
     cvtsi2ss  xmm1,.rot_steps_c
     divss     xmm0,xmm1
     movss     .one_deg,xmm0
     xor       ebx,ebx
     mov       .counter,ebx
     mov       .vert_cnt,ebx
     xorps     xmm0,xmm0
     or        eax,-1
     cvtsi2ss  xmm0,eax
     shufps    xmm0,xmm0,11001111b
     movups    .vert3,xmm0
     cmp       dl,3
     jne       .loop2
     movlps    xmm0,[NextMxadd]
     xorps     xmm1,xmm1
     mov       eax,(BXRES/2) shl 16 + (BXRES/2)
     punpcklwd xmm0,xmm1
     movd      xmm2,eax
     punpcklwd xmm2,xmm1
     psubd     xmm0,xmm2
     cvtdq2ps  xmm0,xmm0
     movhps    xmm0,[the_one]
;     shufps    xmm0,xmm0,11000100b
     movups    .vert2,xmm0
     lea       edi,.vert2
     call      .normalize ;_vector
   .loop2:
     mov       esi,.xesi
     movups    xmm1,[esi]
     cmp       .flag,3
     je        .po_tr
;      xorps     xmm0,xmm0
;      or        eax,-1
;      cvtsi2ss  xmm0,eax
;      shufps    xmm0,xmm0,11001111b
;      movups    .vert2,xmm0
;      movaps    xmm2,xmm1
;      movhlps   xmm3,xmm1
;      shufps    xmm2,xmm2,11100001b
;      maxps     xmm2,xmm1
;      ucomiss   xmm2,xmm3        ; i am not sure if this
;      jnb       .f2              ; pipe is more smooth...
;      shufps    xmm0,xmm0,11111011b
;      movups    .vert2,xmm0
;    .f2:

     mov      esi,.xesi
     lea      edi,[esi+12]
     movups   xmm0,[edi]
     movups   xmm1,[esi]
     movups   xmm3,[esi+24]
     movaps   xmm2,xmm0
     subps    xmm0,xmm1
     movups   .vert1,xmm0
     subps    xmm2,xmm3
     movups   .vert2,xmm2

     lea      edi,.vert1
     call     .normalize
     lea      edi,.vert2
     call     .normalize

     movups   xmm0,.vert3
     movups   xmm1,.vert1
     call     cross_reg
     lea      edi,.radius_vec
     movups   [edi],xmm0

     call     .normalize  ;_vector

     movups   xmm0,.vert2
     movups   xmm1,.radius_vec
     call     cross_reg
     lea      edi,.vert1
     movups   [edi],xmm0

     call     .normalize
     movlps   xmm1,.radius
     movups   xmm2,.vert1
     shufps   xmm1,xmm1,0
     mulps    xmm1,xmm2
   .po_tr:
     movaps   .point_to_rot,xmm1
     xor      ebx,ebx
     mov      .counter_edi,ebx  ; rotary counter
   .loop1:
     cvtpi2ps xmm0,.counter_edi
     mulss    xmm0,.one_deg
     call     sin_cos
     movlps   .sinus,xmm0
     lea      esi,.sinus
     lea      edi,.mx
     lea      ebx,.vert2
     call     make_arbitrary_mx
     lea      esi,.point_to_rot   ; esi - point to rotate
     mov      edi,.xedi
     mov      ecx,1
     lea      ebx,.mx
     call     rotary  ;mul_matrix
     mov      esi,.xesi
     mov      edi,.xedi
     movups   xmm0,[edi]  ; translate point by vector [esi]
     movups   xmm1,[esi]
     addps    xmm0,xmm1
     movlps   [edi],xmm0
     movhlps  xmm0,xmm0
     movss    [edi+8],xmm0
     inc      .vert_cnt
     add      .xedi,12
     inc      dword .counter_edi
     mov      ecx,.counter_edi
     cmp      ecx,.rot_steps_c
     jnz      .loop1
     add      .xesi,12
     inc      .counter
     mov      ecx,.counter
     cmp      ecx,.curve_vert_c
     jnz      .loop2
     ; do tri  angles list
     xor    ebx,ebx
     mov    .counter,ebx  ;18
     mov    esi,.tri_list
     mov    ebx,.rot_steps_c
     xor    eax,eax
     mov    ecx,.curve_vert_c
     sub    ecx,2
    .oop:
     push   ecx
     mov    ecx,.rot_steps_c
     mov    .axx,eax  ; .axx - variable nedeed to last triangle
     dec    ecx       ;  of turn
    @@:
     mov    dword[esi],eax
     mov    edx,eax
     add    edx,ebx
     inc    eax
     mov    dword[esi+4],eax
     mov    dword[esi+8],edx
     mov    dword[esi+16],edx
     inc    edx
     mov    dword[esi+12],edx
     mov    dword[esi+20],eax
     add    .counter,2
     add    esi,24
     loop   @b

     mov    dword[esi],eax
     mov    edx,eax
     add    edx,ebx
     inc    eax
     mov    dword[esi+4],eax
     mov    dword[esi+8],edx
     mov    ecx,.axx      ; last  triangle - special cause
     mov    [esi+12],ecx
     add    ecx,ebx
     mov    [esi+16],ecx
     dec    ecx
     mov    [esi+20],ecx
     add    .counter,2
     add    esi,24
     pop    ecx
     loop   .oop
     mov    ecx,.counter
     mov    ebx,.vert_cnt

   ; Vertices and triangles describing pipe done.
   ; ecx, ebx ->  triangles count and vertices count of pipe.

     add    esp,199
     pop    ebp
ret
